%include 'C:\HSPH\manuscripts\Flourishing and work outcomes\Programs\Revision\Flourishing_and_work_230828.sas'; *call in the datastep codes;
options NOFMTERR;

data original;
set wellbeing_p;

/*drop variables which are to be imputed*/
drop 
     emot_hlth_dim2_H2018 phys_hlth_dim_H2018 social_WB_H2018 char_str_H2018 financ_WB2_H2018 purpose_dim2_H2018 WELL_BEING_eH2018
     lifesat2018 happy72018 expect_good2018 menthlth2018 r_depressed2018 r_anxiety_surv2018 control2018
	 understand2018 meaning2018  purpose2018  purpose_und2018  worthylife2018  purs_imp2018
	 relation_sat2018  people_und2018 r_lonely2018  relation_content2018  people_help2018  connected2018  trust_comm2018
	 doing_good2018 doing_right2018 respect2018 happy_later2018 difficulties2018 give_up2018 strength_help2018
	 expens_diff2018 rw_food_hous_exp2018 savings_suff2018 fin_freedom2018 fin_future2018 r_debt2018
	 physhlth2018 no_illness2018 no_rout_sick2018  health_noprev2018 r_pain2018  hlth_future2018  hlth_maint2018 
     race22018 age_cat2018c gender2018 mart_status2018c vote2018 educ2018c home2018c n_children2018 
     elders2018c rel_attend2018_r spirituality2018_r community2018_r volunt2018_r num_health_cond2017 
     work_hours2018c avg_p502018 flsa_type2018 work_home2018d w_meaning2018 w_recognized2018 w_team2018  
     distracted2018_r distracted2018 jobsatis2018 w_productive2018;
run;

proc sort; by harvard_indiv_id; run;

/*create a dataset only keeping variables that will be used in imputation*/
data impute;  
set wellbeing_p;

keep harvard_indiv_id distracted2019 jobsatis2019 w_productive2019
     emot_hlth_dim2_H2018 phys_hlth_dim_H2018 social_WB_H2018 char_str_H2018 financ_WB2_H2018 purpose_dim2_H2018 WELL_BEING_eH2018
     lifesat2018 happy72018 expect_good2018 menthlth2018 r_depressed2018 r_anxiety_surv2018 control2018
	 understand2018 meaning2018  purpose2018  purpose_und2018  worthylife2018  purs_imp2018
	 relation_sat2018  people_und2018 r_lonely2018  relation_content2018  people_help2018  connected2018  trust_comm2018
	 doing_good2018 doing_right2018 respect2018 happy_later2018 difficulties2018 give_up2018 strength_help2018
	 expens_diff2018 rw_food_hous_exp2018 savings_suff2018 fin_freedom2018 fin_future2018 r_debt2018
	 physhlth2018 no_illness2018 no_rout_sick2018  health_noprev2018 r_pain2018  hlth_future2018  hlth_maint2018 
     race22018 age_cat2018c gender2018 mart_status2018c vote2018 educ2018c home2018c n_children2018 
     elders2018c rel_attend2018_r spirituality2018_r community2018_r volunt2018_r num_health_cond2017 
     work_hours2018c avg_p502018 flsa_type2018 work_home2018d w_meaning2018 w_recognized2018 w_team2018 
     distracted2018 jobsatis2018 w_productive2018; 
run;

/*imputation model*/
proc mi data=impute NIMPUTE=5 out=imputed seed=3237851 noprint;
MCMC CHAIN = multiple displayinit INITIAL = em; 
var 
/*exposure and outcome variables*/
distracted2019 jobsatis2019 w_productive2019
lifesat2018 happy72018 expect_good2018 menthlth2018 r_depressed2018 r_anxiety_surv2018 control2018
understand2018 meaning2018  purpose2018  purpose_und2018  worthylife2018  purs_imp2018
relation_sat2018  people_und2018 r_lonely2018  relation_content2018  people_help2018  connected2018  trust_comm2018
doing_good2018 doing_right2018 respect2018 happy_later2018 difficulties2018 give_up2018 strength_help2018
expens_diff2018 rw_food_hous_exp2018 savings_suff2018 fin_freedom2018 fin_future2018 r_debt2018
physhlth2018 no_illness2018 no_rout_sick2018  health_noprev2018 r_pain2018  hlth_future2018  hlth_maint2018
/*covariates*/
race22018 age_cat2018c gender2018 mart_status2018c vote2018 educ2018c home2018c n_children2018 
elders2018c rel_attend2018_r spirituality2018_r community2018_r volunt2018_r num_health_cond2017 
work_hours2018c avg_p502018 flsa_type2018 work_home2018d w_meaning2018 w_recognized2018 w_team2018 
distracted2018 jobsatis2018 w_productive2018; 
run;

data imputed_r;
set imputed; 

*continuous variables;
array cont[45] lifesat2018 happy72018 expect_good2018 menthlth2018 r_depressed2018 r_anxiety_surv2018 control2018
               understand2018 meaning2018  purpose2018  purpose_und2018  worthylife2018  purs_imp2018
               relation_sat2018  people_und2018 r_lonely2018  relation_content2018  people_help2018  connected2018  trust_comm2018
               doing_good2018 doing_right2018 respect2018 happy_later2018 difficulties2018 give_up2018 strength_help2018
               expens_diff2018 rw_food_hous_exp2018 savings_suff2018 fin_freedom2018 fin_future2018 r_debt2018
               physhlth2018 no_illness2018 no_rout_sick2018  health_noprev2018 r_pain2018  hlth_future2018  hlth_maint2018
               w_meaning2018 w_recognized2018 w_team2018 jobsatis2018 w_productive2018;

do i=1 to 45;
   if cont[i]<0 then cont[i]=0; else if cont[i]>10 then cont[i]=10;
end;

if avg_p502018<33787.81 then avg_p502018=33787.81; else if avg_p502018>246979.16 then avg_p502018=246979.16;
if num_health_cond2017<0 then num_health_cond2017=0; else if num_health_cond2017>12 then num_health_cond2017=12;

*categorical variables;
if race22018 LE 0.5 then race22018=0; else if race22018 LE 1.5 then race22018=1; else race22018=2; 
if age_cat2018c LE 1.5 then age_cat2018c=1; else if age_cat2018c LE 2.5 then age_cat2018c=2; else age_cat2018c=3;
if gender2018 LE 1.5 then gender2018=1; else gender2018=2; 
if mart_status2018c LE 0.5 then mart_status2018c=0; else mart_status2018c=1;  
if vote2018 LE 1.5 then vote2018=1; else if vote2018 LE 2.5 then vote2018=2; else vote2018=3;
if educ2018c LE 1.5 then educ2018c=1; else if educ2018c LE 2.5 then educ2018c=2; else if educ2018c LE 3.5 then educ2018c=3; else educ2018c=4;
if home2018c LE 0.5 then home2018c=0; else home2018c=1; 
if n_children2018 LE 0.5 then n_children2018=0; else if n_children2018 LE 1.5 then n_children2018=1; else if n_children2018 LE 2.5 then n_children2018=2; else if n_children2018 LE 3.5 then n_children2018=3; else if n_children2018 LE 4.5 then n_children2018=4; else n_children2018=5;  
if elders2018c LE 0.5 then elders2018c=0; else elders2018c=1;
if rel_attend2018_r LE 0.5 then rel_attend2018_r=0; else if rel_attend2018_r LE 1.5 then rel_attend2018_r=1; else rel_attend2018_r=2;
if community2018_r LE 0.5 then community2018_r=0; else if community2018_r LE 1.5 then community2018_r=1; else community2018_r=2;
if volunt2018_r LE 0.5 then volunt2018_r=0; else if volunt2018_r LE 1.5 then volunt2018_r=1; else volunt2018_r=2;
if spirituality2018_r LE 0.5 then spirituality2018_r=0; else if spirituality2018_r LE 1.5 then spirituality2018_r=1; else spirituality2018_r=2;
if work_hours2018c LE 1.5 then work_hours2018c=1; else if work_hours2018c LE 2.5 then work_hours2018c=2; else if work_hours2018c LE 3.5 then work_hours2018c=3; else work_hours2018c=4;
if flsa_type2018 LE 1.5 then flsa_type2018=1; else flsa_type2018=2; 
if work_home2018d LE 0.5 then work_home2018d=0; else if work_home2018d LE 1.5 then work_home2018d=1; else work_home2018d=2;
if distracted2018 LE 1.5 then distracted2018=1; else if distracted2018 LE 2.5 then distracted2018=2; else if distracted2018 LE 3.5 then distracted2018=3; else if distracted2018 LE 4.5 then distracted2018=4; else distracted2018=5;

/*Check distribution after recoding*/
proc sort data=imputed_r; by _Imputation_;
proc means n nmiss mean std min max data=imputed_r;
var emot_hlth_dim2_H2018 phys_hlth_dim_H2018 social_WB_H2018 char_str_H2018 financ_WB2_H2018 purpose_dim2_H2018
    lifesat2018 happy72018 expect_good2018 menthlth2018 r_depressed2018 r_anxiety_surv2018 control2018
	understand2018 meaning2018  purpose2018  purpose_und2018  worthylife2018  purs_imp2018
	relation_sat2018  people_und2018 r_lonely2018  relation_content2018  people_help2018  connected2018  trust_comm2018
	doing_good2018 doing_right2018 respect2018 happy_later2018 difficulties2018 give_up2018 strength_help2018
	expens_diff2018 rw_food_hous_exp2018 savings_suff2018 fin_freedom2018 fin_future2018 r_debt2018
	physhlth2018 no_illness2018 no_rout_sick2018  health_noprev2018 r_pain2018  hlth_future2018  hlth_maint2018 
    avg_p502018 num_health_cond2017 w_meaning2018 w_recognized2018 w_team2018 
    distracted2018 jobsatis2018 w_productive2018;
run;

proc freq data=imputed_r;
tables race22018 age_cat2018c gender2018 mart_status2018c vote2018 educ2018c home2018c n_children2018 elders2018c
rel_attend2018_r spirituality2018_r community2018_r volunt2018_r work_hours2018c work_home2018d flsa_type2018 ;
run;

data imputed_r;
set imputed_r;

drop distracted2019 jobsatis2019 w_productive2019;
run;

proc sort data=imputed_r; by harvard_indiv_id;
proc sort data=original;  by harvard_indiv_id;


*******************
MERGE DATASETS
******************;
data merged_mi;
merge imputed_r original;
by harvard_indiv_id;

*******************
DERIVE VARIABLES
******************;
emot_hlth_dim2_H2018= mean (of lifesat2018 happy72018 expect_good2018 menthlth2018 r_depressed2018 r_anxiety_surv2018 control2018);
phys_hlth_dim_H2018= mean (of physhlth2018 no_illness2018 no_rout_sick2018 health_noprev2018 r_pain2018 hlth_future2018 hlth_maint2018);
social_WB_H2018= mean (of relation_sat2018 people_und2018 r_lonely2018 relation_content2018 people_help2018 connected2018 trust_comm2018);
char_str_H2018= mean (of doing_good2018 doing_right2018 respect2018 happy_later2018 difficulties2018 give_up2018 strength_help2018);
financ_WB2_H2018= mean (of expens_diff2018 rw_food_hous_exp2018 savings_suff2018 fin_freedom2018 fin_future2018 r_debt2018);
purpose_dim2_H2018= mean (of understand2018 meaning2018  purpose2018  purpose_und2018  worthylife2018  purs_imp2018);
label emot_hlth_dim2_H2018='emotional health dimension 2018_Harvard';
label phys_hlth_dim_H2018='physical health dimension 2018_Harvard';
label social_WB_H2018='social wellbeing dimension 2018_Harvard';
label char_str_H2018='character strengths dimension 2018_Harvard';
label financ_WB2_H2018='financial wellbeing dimension 2018_Harvard';
label purpose_dim2_H2018='purpose in life dimension 2018_Harvard';

WELL_BEING_eH2018= mean (of emot_hlth_dim2_H2018 phys_hlth_dim_H2018 social_WB_H2018 char_str_H2018 financ_WB2_H2018 purpose_dim2_H2018);

WELL_BEING_eH2018_em= (phys_hlth_dim_H2018+financ_WB2_H2018+char_str_H2018+social_WB_H2018+purpose_dim2_H2018)/5;
WELL_BEING_eH2018_ph= (emot_hlth_dim2_H2018+financ_WB2_H2018+char_str_H2018+social_WB_H2018+purpose_dim2_H2018)/5;
WELL_BEING_eH2018_sc= (emot_hlth_dim2_H2018+phys_hlth_dim_H2018+financ_WB2_H2018+char_str_H2018+purpose_dim2_H2018)/5; 
WELL_BEING_eH2018_ch= (emot_hlth_dim2_H2018+phys_hlth_dim_H2018+financ_WB2_H2018+social_WB_H2018+purpose_dim2_H2018)/5; 
WELL_BEING_eH2018_fn= (emot_hlth_dim2_H2018+phys_hlth_dim_H2018+char_str_H2018+social_WB_H2018+purpose_dim2_H2018)/5;  
WELL_BEING_eH2018_ps= (emot_hlth_dim2_H2018+phys_hlth_dim_H2018+char_str_H2018+social_WB_H2018+financ_WB2_H2018)/5;  

*recode work distaction 2018;
if distracted2018=1 then distracted2018_r=0;
else if distracted2018=2 then distracted2018_r=0.075;
else if distracted2018=3 then distracted2018_r=0.175;
else if distracted2018=4 then distracted2018_r=0.375;
else if distracted2018=5 then distracted2018_r=0.75;

proc format;
value age_cat2018c
      1= '30 or below'
	  2= '31-50'
      3= 'above 50';

value gender2018c
      1= 'female'
	  2= 'male';

value race22018c 
      0= 'white (not Hispanic)'
	  1= 'Black/African-American'
	  2= 'Other';

value mart_status2018c
      0= 'Unmarried'
      1= 'Married or in partnership';

value educ2018c
      1= 'high school diploma or eqivalent'
	  2= 'Some college but no degree'
	  3= 'College degree'
      4= 'Graduate school';

value home2018c
      0= 'no'
	  1= 'yes';

value n_children2018c
      0= 'no children'
      1= 'have children';	  

value elders2018c
      0= 'no elderly'
      1= 'have elderly';	

value work_hours2018c 
      1= '<8h'
      2= '8h'
	  3= '9-10h'
      4= '>10h';

value flsa_type2018c
      1='exempt'
	  2='non-exempt';

value work_home2018d
      0= '0 days/week'
	  1= '1-4 days/week'
	  2= '5 days/week';

value rel_attend2018_r
      0= 'never'
	  1= '<1x/wk'
	  2= '>=1x/wk';    

value community2018_r
      0= 'never'
	  1= '<1x/wk'
	  2= '>=1x/wk';  

value volunt2018_r
      0= 'never'
	  1= '<1x/wk'
	  2= '>=1x/wk';  
	  
value spirituality2018_r
      0= 'never'
	  1= '<1x/wk'
	  2= '>=1x/wk';  

value vote2018c
      1= 'yes'
	  2= 'no'
      3= 'not a register voter';
run;

/*check distribution before standardization*/
proc means N NMISS MEAN STD MIN MAX;
var distracted2019_r jobsatis2019 w_productive2019
    WELL_BEING_eH2018 emot_hlth_dim2_H2018 phys_hlth_dim_H2018 social_WB_H2018 char_str_H2018 financ_WB2_H2018 purpose_dim2_H2018
    WELL_BEING_eH2018_em WELL_BEING_eH2018_ph WELL_BEING_eH2018_sc WELL_BEING_eH2018_ch WELL_BEING_eH2018_fn WELL_BEING_eH2018_ps
    lifesat2018 happy72018 expect_good2018 menthlth2018 r_depressed2018 r_anxiety_surv2018 control2018
	understand2018 meaning2018  purpose2018  purpose_und2018  worthylife2018  purs_imp2018
	relation_sat2018  people_und2018 r_lonely2018  relation_content2018  people_help2018  connected2018  trust_comm2018
	doing_good2018 doing_right2018 respect2018 happy_later2018 difficulties2018 give_up2018 strength_help2018
	expens_diff2018 rw_food_hous_exp2018 savings_suff2018 fin_freedom2018 fin_future2018 r_debt2018
	physhlth2018 no_illness2018 no_rout_sick2018  health_noprev2018 r_pain2018  hlth_future2018  hlth_maint2018 
    avg_p502018 num_health_cond2017 w_meaning2018 w_recognized2018 w_team2018 
    distracted2018_r jobsatis2018 w_productive2018;
title 'Check distribution of continuous variables before standardization'; 
run;

/*standardization*/
proc sort data=merged_mi; by _Imputation_; run;
proc standard data=merged_mi mean=0 std=1 out=merged_mi;
var distracted2019_r jobsatis2019 w_productive2019
    WELL_BEING_eH2018 emot_hlth_dim2_H2018 phys_hlth_dim_H2018 social_WB_H2018 char_str_H2018 financ_WB2_H2018 purpose_dim2_H2018
    WELL_BEING_eH2018_em WELL_BEING_eH2018_ph WELL_BEING_eH2018_sc WELL_BEING_eH2018_ch WELL_BEING_eH2018_fn WELL_BEING_eH2018_ps
    lifesat2018 happy72018 expect_good2018 menthlth2018 r_depressed2018 r_anxiety_surv2018 control2018
	understand2018 meaning2018  purpose2018  purpose_und2018  worthylife2018  purs_imp2018
	relation_sat2018  people_und2018 r_lonely2018  relation_content2018  people_help2018  connected2018  trust_comm2018
	doing_good2018 doing_right2018 respect2018 happy_later2018 difficulties2018 give_up2018 strength_help2018
	expens_diff2018 rw_food_hous_exp2018 savings_suff2018 fin_freedom2018 fin_future2018 r_debt2018
	physhlth2018 no_illness2018 no_rout_sick2018  health_noprev2018 r_pain2018  hlth_future2018  hlth_maint2018 
    avg_p502018 num_health_cond2017 w_meaning2018 w_recognized2018 w_team2018  
    distracted2018_r jobsatis2018 w_productive2018;
by _Imputation_;
run;

/*check distribution after standardization*/
proc means N NMISS MEAN STD MIN MAX;
var distracted2019_r jobsatis2019 w_productive2019
    WELL_BEING_eH2018 emot_hlth_dim2_H2018 phys_hlth_dim_H2018 social_WB_H2018 char_str_H2018 financ_WB2_H2018 purpose_dim2_H2018
    WELL_BEING_eH2018_em WELL_BEING_eH2018_ph WELL_BEING_eH2018_sc WELL_BEING_eH2018_ch WELL_BEING_eH2018_fn WELL_BEING_eH2018_ps
    lifesat2018 happy72018 expect_good2018 menthlth2018 r_depressed2018 r_anxiety_surv2018 control2018
	understand2018 meaning2018  purpose2018  purpose_und2018  worthylife2018  purs_imp2018
	relation_sat2018  people_und2018 r_lonely2018  relation_content2018  people_help2018  connected2018  trust_comm2018
	doing_good2018 doing_right2018 respect2018 happy_later2018 difficulties2018 give_up2018 strength_help2018
	expens_diff2018 rw_food_hous_exp2018 savings_suff2018 fin_freedom2018 fin_future2018 r_debt2018
	physhlth2018 no_illness2018 no_rout_sick2018  health_noprev2018 r_pain2018  hlth_future2018  hlth_maint2018 
    avg_p502018 num_health_cond2017 w_meaning2018 w_recognized2018 w_team2018  
    distracted2018_r jobsatis2018 w_productive2018;
title 'Check distribution of continuous variables after standardization'; 
run;

proc freq;
tables race22018 age_cat2018c gender2018 mart_status2018c vote2018 educ2018c home2018c n_children2018 elders2018c
rel_attend2018_r spirituality2018_r community2018_r volunt2018_r work_hours2018c work_home2018d flsa_type2018 ;
format race22018 race22018c. age_cat2018c age_cat2018c. gender2018 gender2018c. mart_status2018c mart_status2018c. vote2018 vote2018c.
educ2018c educ2018c. home2018c home2018c. n_children2018 n_children2018c. elders2018c elders2018c. rel_attend2018_r rel_attend2018_r. work_home2018d work_home2018d.
spirituality2018_r spirituality2018_r. community2018_r community2018_r. volunt2018_r volunt2018_r. work_hours2018c work_hours2018c.  flsa_type2018 flsa_type2018c.;
title "check categorical covariates";
run;
